package simplecrawler;

import threads.*;
import java.net.*;
import java.util.Vector;

public class PageSaverThread extends ControllableThread {

    public void process(Object o) {
        // The objects that we're dealing with here are strings for urls
        try {
            URL pageURL = (URL) o;

            // See if it's a html or php page
            String filename = pageURL.getFile().toLowerCase();

            if (filename.endsWith(".html") || filename.endsWith(".htm") || filename.endsWith(".php")) {
                filename = filename.replace('/', '-');
                filename = //((URLQueue) queue).getFilenamePrefix()
                        pageURL.getHost() + filename;  
                try {
                    //SaveURL.writeURLtoFile(pageURL, filename);
                    SaveURL.writeTextURLtoFile(pageURL, filename, arul);
                } catch (Exception e) {
                    System.out.println("Saving to file " + filename + " from URL " + pageURL.toString() + " failed due to a " + e.toString());
                }
                return;
            }

            // If it's neither a jpg nor some text, it's not interesting.
            String mimetype = pageURL.openConnection().getContentType();
            if (!mimetype.startsWith("text")) {
                return;
            }

            String rawPage = SaveURL.getURL(pageURL);
            String smallPage = rawPage.toLowerCase().replaceAll("\\s", " ");

            // treat the url a a html file and try to extract links
            Vector links = SaveURL.extractLinks(rawPage, smallPage, initURL);

            // Convert each link text to a url and enque
            for (int n = 0; n < links.size(); n++) {
                try {
                    // urls might be relative to current page
                    URL link = new URL(pageURL,
                            (String) links.elementAt(n));
                    // If layers are not used, write everything into same layer
                    if (tc.getMaxLevel() == -1) {
                        queue.push(link, level);
                    } else {
                        queue.push(link, level + 1);
                    }
                } catch (MalformedURLException e) {
                    // Ignore malformed URLs, the link extractor might
                    // have failed.
                }
            }
        } catch (Exception e) {
            //e.printStackTrace();
            // process of this object has failed, but we just ignore it here
        }
    }
}
